/*******************************************************************************
																				
	DESCRIPTION:  	This do file produces statistics based on the ensemble predictions generated 
					separately for each year, i.e., individuals from year X 
					with predictions generated using a model trained on year X.

*******************************************************************************/

clear all
global id_code 108_1

* Import programs:
run "${code}/Output Generation/108_0_MainStatistics_Program.do"

pause off
set seed 2110

* Import predictions:
foreach model in "Full_2006" "Full_SeqDrop_incIndiv_2006" "Full_Pooled_2006_2007" ///
	"Full_Pooled_2009_2010" "Full_2006_SplitByBetaD_BelowMedian" ///
	"Full_2006_SplitByBetaD_AboveMedian" "Linear" {

	local cond 

	if "`model'" == "Full_2006_SplitByBetaD_BelowMedian" {
		local model Full_2006
		local cond _SplitByBetaD_BelowMedian
	}
	
	if "`model'" == "Full_2006_SplitByBetaD_AboveMedian" {
		local model Full_2006
		local cond _SplitByBetaD_AboveMedian
	}

	if "`model'" == "Linear" {
		use "${data}/116_Linear_Predictions_Full_2006_xMonthPred_yMonthModel.dta", clear
	}
	else {
		use "${data}/003_MainWithEnsemblePred_`model'_xMonthPred_yMonthModel.dta", clear
	
	
		merge 1:1 LopNr_PersonNr InLnr using "${data}/003_MainWithEnsemblePred_`model'.dta", ///
		keepusing(p_emplAft6M_0M_In p_emplAft6M_6M_In p_emplAft6M_12M_In) ///
		assert(1 3) keep(3) nogen
		
		rename p_emplAft6M_0M_In p_emplAft6M_0M_In_0M_Mod
		rename p_emplAft6M_6M_In p_emplAft6M_6M_In_6M_Mod
		rename p_emplAft6M_12M_In p_emplAft6M_12M_In_12M_Mod
	}
	
	
	if inlist("`cond'", "_SplitByBetaD_BelowMedian", "_SplitByBetaD_AboveMedian")  {
		* Import duration dependence betas:
		cap frame drop betaD
		frame create betaD
		frame change betaD
			use "${data}/119_3_DurationDependenceBetas_`model'.dta", clear

			keep LopNr_PersonNr InLnr year beta_1_shrunk_ind

			* Check that distinct spells by the same individuals have the same betaD:
			bysort LopNr_PersonNr: egen temp = mean(beta_1_shrunk_ind)
			assert temp == beta_1_shrunk_ind

			* Drop duplicates:
			drop InLnr temp
			duplicates drop

			* Rank by betaD:
			cumul beta_1_shrunk_ind, gen(betaD_rank) equal
			sort betaD_rank
			gen betaD_above_med = betaD_rank > 0.5

		frame change default
		
		* Merge with betaD:
		frlink m:1 LopNr_PersonNr, frame(betaD)
		frget betaD_above_med, from(betaD)
		
		* Keep relevant observations:
		if "`cond'" == "_SplitByBetaD_BelowMedian" {
			keep if betaD_above_med == 0
		}
		
		if "`cond'" == "_SplitByBetaD_AboveMedian" {
			keep if betaD_above_med == 1
		}
		
	}

	
	if "`model'" == "Full_NoRecalls" {
		keep if recalled == 0
	}
		
	* Create frame to store results
	cap frame drop results
	frame create results str30(sample model) double(n mean_f mean_f_hat var_f_hat cov r2)
		
	* Build table:
	file open myfile using "${output}/${id_code}_MainStatistics_Table_`model'`cond'_xMonthPred_yMonthModel.tex", write replace

	file write myfile "\documentclass{article}" _newline ///
		"\usepackage{booktabs}" _newline ///
		"\usepackage[margin=1in]{geometry}" _newline ///
		"\begin{document}" _newline ///
		"\begin{table}[h] \centering" _newline ///
		"\footnotesize \begin{tabular}{l c c c c c c c}" _newline ///
		"\hline \hline \addlinespace[1.5ex]" _newline ///
		///
		"\textbf{Sample} & \textbf{Model} & N & \(E(F)\) & \(E(\hat{F})\) & \(Var(\hat{F})\)  & \(Cov(\hat{F}, F)\) & \(R^2(\hat{F}, F)\) \\ \addlinespace[0.1cm]" _newline ///
		" \hline \addlinespace[1.5ex]" _newline

	* 0M sample:
	stats emplAft6M_0M_In p_emplAft6M_0M_In_0M_Mod
	frame post results ("At Start of Spell") ("0M Model") (`r(n_ex)') (`r(mean_f_ex)') (`r(mean_f_hat_ex)') (`r(var_f_hat_ex)') (`r(cov_ex)') (`r(r2_ex)')
	file write myfile ///
		"At Start of Spell & 0M Model & `r(n)' & `r(mean_f)' & `r(mean_f_hat)'   & `r(var_f_hat)' & `r(cov)' & `r(r2)' \\ \addlinespace[0.2cm]" _newline
	
	stats emplAft6M_0M_In p_emplAft6M_0M_In_6M_Mod
	frame post results ("At Start of Spell") ("6M Model") (`r(n_ex)') (`r(mean_f_ex)') (`r(mean_f_hat_ex)') (`r(var_f_hat_ex)') (`r(cov_ex)') (`r(r2_ex)')
	file write myfile ///
		"& 6M Model & `r(n)' & `r(mean_f)' & `r(mean_f_hat)'   & `r(var_f_hat)' & `r(cov)' & `r(r2)' \\ \addlinespace[0.2cm]" _newline
	
	stats emplAft6M_0M_In p_emplAft6M_0M_In_12M_Mod
	frame post results ("At Start of Spell") ("12M Model") (`r(n_ex)') (`r(mean_f_ex)') (`r(mean_f_hat_ex)') (`r(var_f_hat_ex)') (`r(cov_ex)') (`r(r2_ex)')
	file write myfile ///
		"& 12M Model & `r(n)' & `r(mean_f)' & `r(mean_f_hat)'   & `r(var_f_hat)' & `r(cov)' & `r(r2)' \\ \addlinespace[0.2cm]" _newline

	file write myfile "\hline \addlinespace[0.2cm]" _newline

	* 6M sample:
	stats emplAft6M_6M_In p_emplAft6M_6M_In_6M_Mod
	frame post results ("6M into Spell") ("6M Model") (`r(n_ex)') (`r(mean_f_ex)') (`r(mean_f_hat_ex)') (`r(var_f_hat_ex)') (`r(cov_ex)') (`r(r2_ex)')
	file write myfile ///
		"6M into Spell & 6M Model & `r(n)' & `r(mean_f)' & `r(mean_f_hat)'   & `r(var_f_hat)' & `r(cov)' & `r(r2)' \\ \addlinespace[0.2cm]" _newline
	
	stats emplAft6M_6M_In p_emplAft6M_6M_In_12M_Mod
	frame post results ("6M into Spell") ("12M Model") (`r(n_ex)') (`r(mean_f_ex)') (`r(mean_f_hat_ex)') (`r(var_f_hat_ex)') (`r(cov_ex)') (`r(r2_ex)')
	file write myfile ///
		"& 12M Model & `r(n)' & `r(mean_f)' & `r(mean_f_hat)'   & `r(var_f_hat)' & `r(cov)' & `r(r2)' \\ \addlinespace[0.2cm]" _newline
	
	file write myfile "\hline \addlinespace[0.2cm]" _newline

	* 12M sample:
	stats emplAft6M_12M_In p_emplAft6M_12M_In_12M_Mod
	frame post results ("12M into Spell") ("12M Model") (`r(n_ex)') (`r(mean_f_ex)') (`r(mean_f_hat_ex)') (`r(var_f_hat_ex)') (`r(cov_ex)') (`r(r2_ex)')
	file write myfile ///
		"12M into Spell & 12M Model & `r(n)' & `r(mean_f)' & `r(mean_f_hat)'   & `r(var_f_hat)' & `r(cov)' & `r(r2)' \\ \addlinespace[0cm]" _newline

	* Finish table:
	file write myfile ///
		"\addlinespace[1.5ex]" _newline ///
		"\hline \hline \addlinespace[1.5ex]" _newline ///
		"\end{tabular}" _newline ///
		"\end{table}" _newline ///
		"\end{document}"

	file close myfile
	frame results: export delimited using "${output}/${id_code}_MainStatistics_Table_`model'`cond'_xMonthPred_yMonthModel.csv", replace
}

********************************************************************************
* TIME SERIES
********************************************************************************

* Build table:
file open myfile using "${output}/${id_code}_MainStatistics_Table_Selection_AllYears.tex", write replace

file write myfile "\documentclass{article}" _newline ///
	"\usepackage{booktabs}" _newline ///
	"\usepackage[margin=1in]{geometry}" _newline ///
	"\begin{document}" _newline ///
	"\begin{table}[h] \centering" _newline ///
	"\footnotesize \begin{tabular}{l c c c c c c c}" _newline ///
	"\hline \hline \addlinespace[3ex]" _newline ///
	///
	"\textbf{Model} & \textbf{Year} & \textbf{0M to 6M} & \textbf{Selection} & \textbf{Fraction} & \textbf{6M to 12M} & \textbf{Selection} & \textbf{Fraction} \\ \addlinespace[0.1cm]" _newline ///
	" \hline \addlinespace[1.5ex]" _newline

cap frame drop time_series
frame create time_series str30(model) year emp_decl1 sel_lb1 sel_lb_ratio1 emp_decl2 sel_lb2 sel_lb_ratio2

* Compute statistics of interest for yearly models:
forval year = 1992/2016 {
	local model Full
	local vars _
	use "${data}/003_MainWithEnsemblePred_`model'_`year'_xMonthPred_yMonthModel.dta", clear

	merge 1:1 LopNr_PersonNr InLnr using "${data}/003_MainWithEnsemblePred_`model'`vars'`year'.dta", ///
	keepusing(p_emplAft6M_0M_In p_emplAft6M_6M_In p_emplAft6M_12M_In) ///
	assert(1 3) keep(3) nogen
	rename p_emplAft6M_0M_In p_emplAft6M_0M_In_0M_Mod
	rename p_emplAft6M_6M_In p_emplAft6M_6M_In_6M_Mod
	rename p_emplAft6M_12M_In p_emplAft6M_12M_In_12M_Mod
	
	* Compute stats on 0M sample + 6M model:
	stats emplAft6M_0M_In p_emplAft6M_0M_In_6M_Mod
	local e_f0 = r(mean_f_ex)
	local cov_f0_fhat6 = r(cov_ex)
	local sel_lb1 = `cov_f0_fhat6' / (1 - `e_f0')

	* Compute stats on 6M sample + 6M model:
	stats emplAft6M_6M_In p_emplAft6M_6M_In_6M_Mod
	local e_f6 = r(mean_f_ex)
	local emp_decl1 = `e_f0' - `e_f6'
	local sel_lb_ratio1 = `sel_lb1' / `emp_decl1'
	
	* Compute stats on 6M sample + 12M model:
	stats emplAft6M_6M_In p_emplAft6M_6M_In_12M_Mod
	local cov_f6_fhat12 = r(cov_ex)
	local sel_lb2 = `cov_f6_fhat12' / (1 - `e_f6')

	* Compute stats on 12M sample + 12M model:
	stats emplAft6M_12M_In p_emplAft6M_12M_In_12M_Mod
	local e_f12 = r(mean_f_ex)
	local emp_decl2 = `e_f6' - `e_f12'
	local sel_lb_ratio2 = `sel_lb2' / `emp_decl2'
	
	* Post to frame:
	frame post time_series ("Full") (`year') (`emp_decl1') (`sel_lb1') (`sel_lb_ratio1') ///
		(`emp_decl2') (`sel_lb2') (`sel_lb_ratio2') 
	
	file write myfile ///
		"Baseline & `year' & `: di %9.4f `emp_decl1'' & `: di %9.4f `sel_lb1'' & `: di %9.4f `sel_lb_ratio1'' &  `: di %9.4f `emp_decl2'' & `: di %9.4f `sel_lb2'' & `: di %9.4f `sel_lb_ratio2'' \\ \addlinespace[0.2cm]" _newline

}

* Compute statistics of interest for pooled models:
foreach year in Pooled_2006_2007 Pooled_2009_2010 {
	local model Full
	use "${data}/003_MainWithEnsemblePred_`model'_`year'_xMonthPred_yMonthModel.dta", clear

	merge 1:1 LopNr_PersonNr InLnr using "${data}/003_MainWithEnsemblePred_`model'_`year'.dta", ///
	keepusing(p_emplAft6M_0M_In p_emplAft6M_6M_In p_emplAft6M_12M_In) ///
	assert(1 3) keep(3) nogen
	
	rename p_emplAft6M_0M_In p_emplAft6M_0M_In_0M_Mod
	rename p_emplAft6M_6M_In p_emplAft6M_6M_In_6M_Mod
	rename p_emplAft6M_12M_In p_emplAft6M_12M_In_12M_Mod	
	
	* Compute stats on 0M sample + 6M model:
	stats emplAft6M_0M_In p_emplAft6M_0M_In_6M_Mod
	local e_f0 = r(mean_f_ex)
	local cov_f0_fhat6 = r(cov_ex)
	local sel_lb1 = `cov_f0_fhat6' / (1 - `e_f0')

	* Compute stats on 6M sample + 6M model:
	stats emplAft6M_6M_In p_emplAft6M_6M_In_6M_Mod
	local e_f6 = r(mean_f_ex)
	local emp_decl1 = `e_f0' - `e_f6'
	local sel_lb_ratio1 = `sel_lb1' / `emp_decl1'
	
	* Compute stats on 6M sample + 12M model:
	stats emplAft6M_6M_In p_emplAft6M_6M_In_12M_Mod
	local cov_f6_fhat12 = r(cov_ex)
	local sel_lb2 = `cov_f6_fhat12' / (1 - `e_f6')

	* Compute stats on 12M sample + 12M model:
	stats emplAft6M_12M_In p_emplAft6M_12M_In_12M_Mod
	local e_f12 = r(mean_f_ex)
	local emp_decl2 = `e_f6' - `e_f12'
	local sel_lb_ratio2 = `sel_lb2' / `emp_decl2'
	
	* Post to frame:
	frame post time_series ("`year'") (0) (`emp_decl1') (`sel_lb1') (`sel_lb_ratio1') ///
		(`emp_decl2') (`sel_lb2') (`sel_lb_ratio2') 
	
	file write myfile ///
		"Baseline & `year' & `: di %9.4f `emp_decl1'' & `: di %9.4f `sel_lb1'' & `: di %9.4f `sel_lb_ratio1'' &  `: di %9.4f `emp_decl2'' & `: di %9.4f `sel_lb2'' & `: di %9.4f `sel_lb_ratio2'' \\ \addlinespace[0.2cm]" _newline

}

* Finish table:
file write myfile ///
	"\addlinespace[1.5ex]" _newline ///
	"\hline \hline \addlinespace[1.5ex]" _newline ///
	"\end{tabular}" _newline ///
	"\end{table}" _newline ///
	"\end{document}"

file close myfile

* Fix time for pooled models:
frame change time_series	
replace year = 1998.5 if model == "Pooled_1998_1999"
replace year = 2006.5 if model == "Pooled_2006_2007"
replace year = 2009.5 if model == "Pooled_2009_2010"
replace year = 2014.5 if model == "Pooled_2014_2015"

sort year
	
* Merge with unemployment rate:
merge 1:1 year using "${data}/101_Employed_Unemployed.dta", ///
	keepusing(shareUnempSS) nogen
drop if year == 2017

* Generate the variable and local needed for indication of recessions
gen barUpper = 0
replace barUpper = 0.2

local barcall barUpper year if inrange(year, 1992, 1993)| inrange(year, 2008, 2009) | inrange(year, 2012, 2012), lcolor(gs14%50) fcolor(gs14) base(0)
		
* Plot absolute values:
	twoway ///
		(bar `barcall') ///
		(connected emp_decl1 year if model == "Full", msymbol(O) color(orange_red)) ///
		(scatter emp_decl1 year if model != "Full", msymbol(O) color(sienna)) ///
		(connected sel_lb1 year if model == "Full", msymbol(T) color(ebblue) lp(solid)) ///
		(scatter sel_lb1 year if model != "Full", msymbol(T) color(dknavy) lp(solid)) ///
		(line shareUnempSS year if inrange(year, 1992, 2016), color(gs7) yaxis(2)), ///
		/* design the plot and graph region*/ ///
		graphregion(color(white)) ///
		/* plotregion(margin(b=0 y=0)) */ ///
		///
		/* define the axis titles */ ///
		ytitle(" ") ytitle("Unemployment Rate (%)", angle(180) axis(2)) ///
		xtitle("") ///
		///
		/* define the axis labels */ ///
		ylabel(0(0.025)0.2, angle(0) format(%5.2f)) ///
		ylabel(0(2)10, angle(0) axis(2) format(%5.0f)) ///
		xlabel(1992(4)2016) xscale(titlegap(2)) ///
		///
		/* define the legend */ ///
		legend(rows(3) colfirst symxsize(*0.6) ///
			order( - "Decline in JFR from 0M to 6M" 2 "Yearly models" 3 "Pooled models" - "Selection effect (lower bound)" 4 "Yearly models" 5 "Pooled models") ///
			size(*0.8)) ///
		name(abs, replace)
		
	graph export "${output}/${id_code}_MainStatistics_TimeSeries_Selection.pdf", as(pdf) replace	
	
replace barUpper = 0.8	
* Plot relative values:
	twoway ///
		(bar `barcall') ///
		(connected sel_lb_ratio1 year if model == "Full", msymbol(T) color(ebblue) lp(solid)) ///
		(scatter sel_lb_ratio1 year if model != "Full", msymbol(T) color(dknavy) lp(solid)) ///
		(line shareUnempSS year if inrange(year, 1992, 2016), color(gs7) yaxis(2)), ///
		/* design the plot and graph region*/ ///
		graphregion(color(white)) ///
		/* plotregion(margin(b=0 y=0)) */ ///
		///
		/* define the axis titles */ ///
		ytitle("Fraction of decline in JFR explained by selection", size(medsmall)) ytitle("Unemployment Rate (%)", angle(180) axis(2)) ///
		xtitle("") ///
		///
		/* define the axis labels */ ///
		ylabel(0(0.1)0.8, angle(0) format(%5.2f)) ///
		ylabel(0(2)10, angle(0) axis(2) format(%5.0f)) ///
		xlabel(1992(4)2016) xscale(titlegap(2)) ///
		///
		/* define the legend */ ///
		legend(rows(1) colfirst symxsize(*0.6) ///
			order(2 "Yearly models" 3 "Pooled models") ///
			size(*0.8)) ///
		name(rel, replace)
		
	graph export "${output}/${id_code}_MainStatistics_TimeSeries_Selection_Relative.pdf", as(pdf) replace	
	